Define variables for coding stoves and for filtering out data.
- Also set some constants for the stove event analysis
reimport_data <- FALSE #Set to TRUE if we need to reimport data SUMSARIZED, but this is slower. Set to FALSE if already imported and no new data has been added.
source('../r_files/parameter_file_Nigeria_AfDB.R')
Removed_Houses <- print(HHID_remove)
## [1] "^SHO_03$|^SHO_04$|^SHO_08$|^SHO_10$|^MUS_01$|^AKO_01$|^AKO_02$|^AKO_03$|^AKO_05$|^AKO_06$|^AKO_07$|^AKO_08$"
Files_with_these_strings_removed <- print(bad_files)
## [1] "0_0_3_P83601_IndiaDMYminave|0_0_3_P46673_Indiaminaves|AKOKA DOWNLOAD 5_CC/|AKOKA DOWNLOAD|AKOKA DOWNLOAD 5_KE/|AKOKA DOWNLOAD 5_CC/|AKOKA DOWNLOAD 5_LPG/|MUSHIN DOWNLOAD 5_CC/|MUSHIN DOWNLOAD 5|MUSHIN DOWNLOAD 5_KE/|SHOMOLU DOWNLOAD 5|SHOMOLU DOWNLOAD 5_CC/|SHOMOLU DOWNLOAD 5_KE/|SHOMOLU DOWNLOAD 5_LPG/|Akoka fourth download CC/|Akoka fouth download|Akoka fourth download|Akoka fourth download LPG/|Mushin fourth download CC/|Mushin fourth download KE/|Shomolu fourth download|Shomolu fourth download CC/|Shomolu fourth download KE/|Shomolu fourth download LPG/"
print(stove_codes)
## stove stove_descriptions
## 1 CC CleanCook
## 2 CC(B) CleanCook
## 3 LP LPG
## 4 LPG LPG
## 5 KE Kerosene
## 6 KE1(B) Kerosene
## 7 KE2(B) Kerosene
## 8 KE(B) Kerosene
## 9 AMB Ambient
## 10 AMB(B) Ambient
## 11 KE1 Kerosene
## 12 KE1 Kerosene
## 13 KE2 Kerosene
## 14 KE(1) Kerosene
## 15 KE(2) Kerosene
source('../r_scripts/load.R')
source("../r_scripts/functions.R")
source("../r_scripts/load_data.r")
source("../r_scripts/plots.R")
source("../r_scripts/filter_sum_data.R")
Import temperature data and metadata files
Tidy
- add household id information, get metadata from metadata file and from file names, depending on country
#Import thermocouple data time series output from Sumsarizer.
#datetime_placed is currently based on the metadata. Flags data as good/bad based on the bad_files and HHID_remove variables. Those are reapplied directly to the cooking events later, and filtered out if bad.
if ( reimport_data == TRUE) {
sumsarized_filtered <- filter_sumsarized(sumsarized,metadata,bad_files,HHID_remove,project_name,stove_codes) #HHID is grabbed from the filename.
saveRDS(sumsarized_filtered, file ="../r_files/sumsarized_filtered.RDS")
} else {
sumsarized_filtered <- readRDS("../r_files/sumsarized_filtered.RDS") #
}
- Form cooking events from raw sumsarizer output
- Group distinct cooking events together. If they are within cooking_group minutes of each other.
- Only need this for data that went through the web-based sumsarizer, where events are not made automatically.
# # start counting up for cooking events. Add columns for start and stop times of cooking events, and keep the hhid, loggerid, stove type, field group. Keep only cooking event time points. Need a case for start (if i = 1), if data point is within 30 minutes of the last one, if data point is greater than 30 minutes from the last one.
# #Initialize the cooking_events frame.
cooking_events <- data.frame(start_time=as.POSIXct(character()),
end_time=as.POSIXct(character()), group=factor(), HHID=factor(),
logger_id=factor(),stove=factor(), logging_duration_days=as.numeric(),
datetime_placed = as.POSIXct(character()), datetime_removal = as.POSIXct(character()),
file_indices = as.numeric(), filename=character(),fullsumsarizer_filename=character(),comments=character(),
qc_dates=character(),use_flag = as.logical())
#for each SUM data file
for (i in unique(sumsarized_filtered$file_indices)) {
#Grab data from file i, and keep only the entries that are marked as cooking
temp <- dplyr::filter(sumsarized_filtered,file_indices == i) %>%
dplyr::filter(state == TRUE) %>% dplyr::filter(!duplicated(datetime))
qc_temp <- "ok" #Default to ok data quality, gets demoted based on later checks.
if (dim(temp)[1]>1 && any(temp$state==TRUE)) {
min_time_file = min(temp$datetime)
max_time_file = max(temp$datetime)
time_difference <- as.numeric(difftime(temp$datetime,lag(temp$datetime),units="mins"))
time_difference <- time_difference[!is.na(time_difference)] #
breakstart <- c(0,which((time_difference>cooking_group) == TRUE))+1 #Start of a cooking event
breakend <- c(which((time_difference>cooking_group) == TRUE),
if (tail(temp$state,n=1) == TRUE){dim(temp)[1]}) #End of cooking event.
#Tail part is in case the time series ends while still cooking...need to account for th
#Organize/check/fix datetimes of sum placements and removals. If there is a start and end time available for the monitoring period, use it to keep the data from the file. Disregard this if the datetime_removal is NA, since it means we don't have a fixed known end date. In this case we assume the end of the file is the end of the monitoring preiod and keep all the data from the given file. Provide the start and end date in the event-building loop below.
if (max_time_file>end_date_range | min_time_file<start_date_range) { #datetime_placed is changed to the local file value if it is below the start_date_range, and datetime_placed is too, according to the end_date_range
datetime_placed_temp = min_time_file
datetime_removal_temp = max_time_file
qc_temp <- "out_of_placement_range" #These get filtered out, assuming it represents poorly time formatted data.
} else if (is.na(as.POSIXct(temp$datetime_removal[1])) | is.na(as.POSIXct(temp$datetime_placed[1]))){ #If date is NA, use the file's dates. They have already been midnight-trimmed in the load_data.r function.
datetime_placed_temp = min_time_file
datetime_removal_temp = max_time_file
qc_temp <- "NA_metadata"
} else if (min_time_file> as.POSIXct(temp$datetime_placed[1])) { #If placement time is before time of first data point, switch to first data point.
datetime_placed_temp = min_time_file
datetime_removal_temp = max_time_file
qc_temp <- "placement_before_data"
} else if (as.POSIXct(min(temp$datetime_placed))> as.POSIXct(max(temp$datetime_removal))) { #If placement time is greater than the datetime placed, switch them, it's a mistake.
datetime_placed_temp = min_time_file
datetime_removal_temp = max_time_file
qc_temp <- "placement_greaterthan_removal"
} else { #If not NA, a value must have been found in the meta data, use it.
datetime_placed_temp = as.POSIXct(min(temp$datetime_placed))
datetime_removal_temp = as.POSIXct(max(temp$datetime_removal))
qc_temp <- "metadata_dates_used"
}
#Add cooking events to the cooking_events data frame.
cooking_events <- rbind(cooking_events,
data.frame(start_time= as.POSIXct(temp$datetime[breakstart]),
end_time=as.POSIXct(temp$datetime[breakend]),
group=as.factor(temp$group[breakstart]),
HHID=as.factor(temp$HHID[breakstart]),
logger_id=factor(temp$logger_id[breakstart]),
stove=factor(temp$stove[breakstart]),
logging_duration_days = as.numeric(difftime(datetime_removal_temp,datetime_placed_temp,units = "days")),
datetime_placed = datetime_placed_temp,
datetime_removal = datetime_removal_temp,
file_indices = temp$file_indices[breakstart],
filename = temp$filename[breakstart],
fullsumsarizer_filename = temp$fullsumsarizer_filename[breakstart],
comments = temp$comments[1],
use_flag=as.logical(rep(1,length(breakstart)[1])),
qc_dates = qc_temp))
} else{
#If no cooking events are found, still create an entry, though with the use flag as FALSE, so
#that we know that there was data collected and zero events in that period. Set the use_flag to true here to differntiate from the too-short cooking events.
temp <- dplyr::filter(sumsarized_filtered,file_indices == i) #Create new temp here so
#we can get info about the sample that does not have cooking events.
cooking_events <- rbind(cooking_events,
data.frame(start_time=as.POSIXct(temp$datetime[1]),
end_time=as.POSIXct(temp$datetime[1]),
group=as.factor(temp$group[1]),
HHID=as.factor(temp$HHID[1]),
logger_id=factor(temp$logger_id[1]),
stove=factor(temp$stove[1]),
logging_duration_days = as.numeric(difftime(max(temp$datetime),min(temp$datetime),units = "days")),
datetime_placed = min(temp$datetime),
datetime_removal = max(temp$datetime),
file_indices = temp$file_indices[1],
filename = temp$filename[1],
fullsumsarizer_filename = temp$fullsumsarizer_filename[1],
comments = temp$comments[1],
qc_dates = qc_temp,
use_flag=FALSE))
}
}
#Clean up cooking events. Add better variable names for stoves, and units.
cooking_events <- dplyr::left_join(cooking_events,
dplyr::select(stove_codes,stove,stove_descriptions),by = "stove") %>%
dplyr::mutate(units = "degrees Celsius") %>% #Define units as degrees C
dplyr::mutate(duration_minutes = as.numeric(difftime(end_time,start_time,units = "mins"))) %>%
dplyr::mutate(qc = if_else(grepl(bad_files,fullsumsarizer_filename,ignore.case=TRUE),"bad","ok")) %>% #Flag data from bad files
dplyr::mutate(qc = if_else(grepl(HHID_remove,HHID),"bad",qc)) %>% #Flag data from HHID's that should be removed.
dplyr::mutate(qc = if_else(grepl("amb",fullsumsarizer_filename,ignore.case=TRUE),"bad",qc)) #Flag data from HHID's that should be removed.
Remove data from bad files:
- merge flags with data *Perform filtering on cooking events variable to get to the subset we want.
#Remove short events, while keeping entries from files with no events.
ok_cooking_events <- dplyr::filter(cooking_events,!is.na(stove_descriptions)) %>%
dplyr::filter(logging_duration_days >= logging_duration_minimum) %>% # Only keep files longer than a day.
#Filter out events shorter than the threshold, while keeping the entries that
#have no uses in the deployments. At this point all events except empty files have use_flag = TRUE.
dplyr::filter(!grepl("out_of_placement_range",qc_dates)) %>%#Keep data not marked as placed out of range
dplyr::select(filename,fullsumsarizer_filename,start_time,end_time,duration_minutes,
HHID,stove,logger_id,group,
stove_descriptions,stove,logging_duration_days,datetime_placed,
datetime_removal, units,comments,use_flag,qc) %>%
dplyr::mutate(start_hour = hour(start_time) + minute(start_time)/60) %>%
dplyr::mutate(month_year = format(start_time,"%b-%y")) %>%
dplyr::mutate(month_year = factor(month_year, unique(month_year), ordered=TRUE)) %>%
dplyr::mutate(day_month_year = as.Date(start_time)) %>%
dplyr::mutate(week_year = format(start_time,"%V-%y")) %>%
dplyr::mutate(day_of_week = as.factor(weekdays(start_time))) %>%
dplyr::mutate(day_of_week = factor(day_of_week,
levels = c('Monday','Tuesday','Wednesday','Thursday',
'Friday','Saturday','Sunday'))) %>%
dplyr::arrange(desc(datetime_removal)) %>% #Sort so we toss the earlier end dates when deleting distinct values.
dplyr::distinct(start_time,duration_minutes,HHID,stove_descriptions, .keep_all = TRUE) %>%
#Handle cases that have duplicated events due to overlapping files. Keep only distinct ones
dplyr::mutate(datetime_placed = floor_date(datetime_placed,"minute")) %>%
dplyr::mutate(qc = if_else(logging_duration_days < total_days_logged_minimum,"bad",qc)) %>% # Only keep data from house-stove combos with more than this number of days of data.
dplyr::arrange(HHID) %>%
dplyr::filter(!is.na(HHID)) %>%
dplyr::filter(!is.na(datetime_placed)) %>%
dplyr::filter(grepl("ok",qc)) %>% #Keep only data marked 'ok'
droplevels() #Getting rid of extra levels whos data was filtered out, so unique doesn't freak out.
ok_cooking_events_padded <- data.frame(stringsAsFactors = FALSE)
#Replace NA with 0.
#dplyr_if_else <- function(x) { mutate_all(x, funs(ifelse(is.na(.), 0, .))) }
uniquers <- unique(ok_cooking_events$fullsumsarizer_filename)
for (i in 1:length(unique(ok_cooking_events$filename))) {
temp <- dplyr::filter(ok_cooking_events,uniquers[i]==fullsumsarizer_filename) %>%
dplyr::arrange(start_time)
if (dim(temp)[1]>0) {
# generate a time sequence with 1 day intervals to fill in
# missing dates
all.dates <- data.frame(dates = seq(temp$datetime_placed[1], temp$datetime_removal[1], by="day"),stringsAsFactors = FALSE) %>%
dplyr::mutate(day_month_year = as.Date(dates)) %>%
dplyr::filter(!day_month_year %in% temp$day_month_year) #Get rid of extra days
# Convert all dates to a data frame. Note that we're putting
# the new dates into a column called "start_time" just like the
# original column. This will allow us to merge the data.
all.dates.frame <- data.frame(list(start_time=all.dates$dates),list(end_time=all.dates$dates),
list(day_month_year = as.Date(all.dates$dates)),
list(week_year = format(all.dates$dates,"%V-%y")),
list(day_of_week = weekdays(all.dates$dates)),stringsAsFactors = FALSE) %>%
dplyr::mutate(month_year = format(start_time,"%b-%y")) %>%
dplyr::mutate(month_year = factor(month_year, unique(month_year), ordered=TRUE))
# Merge the two datasets: the full dates and original data
merged.data <- merge(all.dates.frame, temp, all=T) %>%
tidyr::fill(filename,fullsumsarizer_filename,HHID,stove,logger_id,group,
stove_descriptions,logging_duration_days,datetime_placed,
datetime_removal,units,comments,
start_hour,.direction = c("up")) %>%
tidyr::fill(filename,fullsumsarizer_filename,HHID,stove,logger_id,group,
stove_descriptions,logging_duration_days,datetime_placed,
datetime_removal,units,comments,
start_hour,.direction = c("down")) %>%
dplyr::mutate(use_flag = replace(use_flag, is.na(use_flag), FALSE))
merged.data %>% mutate_if(is.factor, as.character) -> merged.data
merged.data$use_flag[is.na(merged.data$use_flag)] <- FALSE
merged.data[is.na(merged.data)] <- 0
# The above merge set the new observations to NA.
# To replace those with a 0, we must first find all the rows
# and then assign 0 to them.
#merged.data <-dplyr_if_else(merged.data)
ok_cooking_events_padded <- rbind(ok_cooking_events_padded,merged.data)
}
}
#time format was messed up somehow in making the padded set, correct it here.
ok_cooking_events_padded <- dplyr::mutate(ok_cooking_events_padded,datetime_removal = as.POSIXct(datetime_removal,origin = "1970-1-1", tz = "GMT")) %>%
dplyr::arrange((start_time)) %>%
dplyr::mutate(month_year = format(start_time,"%b-%y")) %>%
dplyr::mutate(month_year = factor(month_year, unique(month_year), ordered=TRUE)) %>%
dplyr::mutate(qc = "ok") #New qc values default to zero during padding, but they are all already qc-filtered, so reset to TRUE
#Finish filtering the dataset. Kept 'bad' and short events to get the full padded dataset. Now can remove them
ok_cooking_events <- dplyr::filter(ok_cooking_events,duration_minutes>cooking_duration_minimum | use_flag==FALSE)
Summarize data
ok_datasummary <- summarise_all(ok_cooking_events,funs(n_distinct(.)))
#Summarize use for each household and stove.
stats_grouped_by_hhid_stove <- dplyr::group_by(ok_cooking_events_padded,stove_descriptions,HHID,group) %>%
# calculate events per day for each household and stove. Can then take summary stats of those.
dplyr::summarise(events_per_day=sum(use_flag,na.rm=TRUE)/length(unique(day_month_year)),
minutes_per_day_stove_hhid = sum(duration_minutes,na.rm=TRUE)/length(unique(day_month_year)),
minutes_per_event_stove_hhid = sum(duration_minutes,na.rm=TRUE)/sum(use_flag,na.rm=TRUE),
days_logged_per_group_by_stove=sum(length(unique(day_month_year)), na.rm = TRUE),
events = n())
kable(stats_grouped_by_hhid_stove, digits=2)
| CleanCook |
AKO_01B |
AKO |
0.00 |
0.00 |
NaN |
13 |
13 |
| CleanCook |
AKO_02B |
AKO |
1.46 |
33.85 |
23.16 |
13 |
25 |
| CleanCook |
AKO_03B |
AKO |
3.38 |
78.46 |
23.18 |
13 |
44 |
| CleanCook |
AKO_04 |
AKO |
0.32 |
8.74 |
27.67 |
95 |
109 |
| CleanCook |
AKO_05B |
AKO |
0.75 |
38.75 |
51.67 |
8 |
10 |
| CleanCook |
AKO_06B |
AKO |
0.00 |
0.00 |
NaN |
13 |
13 |
| CleanCook |
AKO_09 |
AKO |
0.59 |
26.76 |
45.50 |
34 |
43 |
| CleanCook |
AKO_10 |
AKO |
1.31 |
36.77 |
28.15 |
62 |
106 |
| CleanCook |
MUS_02 |
MUS |
0.33 |
7.53 |
22.59 |
81 |
95 |
| CleanCook |
MUS_03 |
MUS |
0.20 |
2.63 |
13.33 |
76 |
83 |
| CleanCook |
MUS_04 |
MUS |
0.35 |
8.02 |
23.00 |
86 |
98 |
| CleanCook |
MUS_05 |
MUS |
1.41 |
40.26 |
28.55 |
39 |
68 |
| CleanCook |
MUS_06 |
MUS |
0.68 |
57.88 |
84.79 |
104 |
157 |
| CleanCook |
MUS_07 |
MUS |
1.21 |
42.42 |
35.04 |
95 |
145 |
| CleanCook |
MUS_08 |
MUS |
0.75 |
18.67 |
25.00 |
83 |
120 |
| CleanCook |
MUS_09 |
MUS |
1.70 |
45.83 |
26.89 |
115 |
224 |
| CleanCook |
MUS_10 |
MUS |
1.11 |
30.57 |
27.44 |
70 |
129 |
| CleanCook |
SHO_01 |
SHO |
0.94 |
27.57 |
29.24 |
70 |
99 |
| CleanCook |
SHO_02 |
SHO |
0.83 |
24.33 |
29.20 |
60 |
81 |
| CleanCook |
SHO_05 |
SHO |
0.70 |
15.06 |
21.64 |
79 |
115 |
| CleanCook |
SHO_06 |
SHO |
0.45 |
11.63 |
26.10 |
92 |
108 |
| CleanCook |
SHO_07 |
SHO |
0.90 |
29.20 |
32.44 |
50 |
63 |
| CleanCook |
SHO_09 |
SHO |
0.36 |
6.43 |
18.00 |
70 |
81 |
| Kerosene |
AKO_01B |
AKO |
1.46 |
74.62 |
51.05 |
13 |
20 |
| Kerosene |
AKO_02B |
AKO |
0.00 |
0.00 |
NaN |
13 |
13 |
| Kerosene |
AKO_03B |
AKO |
1.85 |
186.92 |
101.25 |
13 |
24 |
| Kerosene |
AKO_04 |
AKO |
0.90 |
47.74 |
52.86 |
93 |
119 |
| Kerosene |
AKO_05B |
AKO |
0.42 |
20.83 |
50.00 |
12 |
12 |
| Kerosene |
AKO_06B |
AKO |
1.08 |
73.08 |
67.86 |
13 |
17 |
| Kerosene |
AKO_09 |
AKO |
0.74 |
52.80 |
71.35 |
50 |
72 |
| Kerosene |
AKO_10 |
AKO |
2.36 |
176.94 |
74.94 |
108 |
262 |
| Kerosene |
MUS_02 |
MUS |
3.12 |
188.31 |
60.34 |
124 |
389 |
| Kerosene |
MUS_03 |
MUS |
0.75 |
59.50 |
79.33 |
20 |
28 |
| Kerosene |
MUS_04 |
MUS |
0.86 |
60.63 |
70.44 |
79 |
99 |
| Kerosene |
MUS_05 |
MUS |
2.05 |
78.64 |
38.44 |
66 |
142 |
| Kerosene |
MUS_06 |
MUS |
2.24 |
233.77 |
104.56 |
106 |
257 |
| Kerosene |
MUS_07 |
MUS |
0.69 |
35.57 |
51.67 |
61 |
78 |
| Kerosene |
MUS_08 |
MUS |
2.03 |
121.39 |
59.86 |
72 |
156 |
| Kerosene |
MUS_09 |
MUS |
1.28 |
67.72 |
52.72 |
123 |
185 |
| Kerosene |
MUS_10 |
MUS |
2.58 |
180.33 |
70.00 |
92 |
273 |
| Kerosene |
SHO_01 |
SHO |
1.86 |
113.33 |
61.03 |
84 |
165 |
| Kerosene |
SHO_02 |
SHO |
1.76 |
159.73 |
90.76 |
75 |
141 |
| Kerosene |
SHO_05 |
SHO |
3.89 |
274.78 |
70.61 |
46 |
179 |
| Kerosene |
SHO_06 |
SHO |
0.00 |
0.00 |
NaN |
84 |
84 |
| Kerosene |
SHO_07 |
SHO |
0.12 |
7.50 |
60.00 |
32 |
32 |
| Kerosene |
SHO_09 |
SHO |
1.39 |
75.42 |
54.27 |
59 |
94 |
| LPG |
AKO_04 |
AKO |
1.14 |
31.40 |
27.55 |
93 |
136 |
| LPG |
SHO_01 |
SHO |
0.35 |
7.82 |
22.63 |
55 |
62 |
| LPG |
SHO_02 |
SHO |
0.00 |
0.00 |
NaN |
9 |
9 |
| LPG |
SHO_05 |
SHO |
0.63 |
18.96 |
30.24 |
67 |
80 |
| LPG |
SHO_06 |
SHO |
1.03 |
44.05 |
42.89 |
74 |
103 |
#Calculate summary for the overall groups and stoves from the household-wise results above. Should this be weighted in case one hh has way more data?
stats_by_stove_group <- dplyr::group_by(stats_grouped_by_hhid_stove,stove_descriptions,group) %>%
dplyr::summarise(mean_events_per_day=mean(events_per_day, na.rm = TRUE),
median_events_per_day=median(events_per_day, na.rm = TRUE),
sd_events_per_day=sd(events_per_day, na.rm = TRUE),
mean_minutes_per_day_stove_hhid=mean(minutes_per_day_stove_hhid, na.rm = TRUE),
median_minutes_per_day_stove_hhid=median(minutes_per_day_stove_hhid, na.rm = TRUE),
sd_minutes_per_day_stove_hhid=sd(minutes_per_day_stove_hhid, na.rm = TRUE),
mean_days_logged=mean(days_logged_per_group_by_stove, na.rm = TRUE),
house_stoves=n()) %>%
dplyr::filter(!is.na(stove_descriptions))
kable(stats_by_stove_group, digits=2)
| CleanCook |
AKO |
0.98 |
0.67 |
1.11 |
27.92 |
30.31 |
25.95 |
31.38 |
8 |
| CleanCook |
MUS |
0.86 |
0.75 |
0.53 |
28.20 |
30.57 |
19.76 |
83.22 |
9 |
| CleanCook |
SHO |
0.70 |
0.76 |
0.24 |
19.04 |
19.70 |
9.31 |
70.17 |
6 |
| Kerosene |
AKO |
1.10 |
0.99 |
0.77 |
79.12 |
62.94 |
68.23 |
39.38 |
8 |
| Kerosene |
MUS |
1.73 |
2.03 |
0.87 |
113.98 |
78.64 |
70.43 |
82.56 |
9 |
| Kerosene |
SHO |
1.50 |
1.57 |
1.42 |
105.13 |
94.38 |
103.24 |
63.33 |
6 |
| LPG |
AKO |
1.14 |
1.14 |
NA |
31.40 |
31.40 |
NA |
93.00 |
1 |
| LPG |
SHO |
0.50 |
0.49 |
0.44 |
17.71 |
13.39 |
19.21 |
51.25 |
4 |
#Calculate summary for the overall stoves from the household-wise results above. Should this be weighted in case one hh has way more data?
#Same as above but without the group, just by stove.
stats_by_stove <- dplyr::group_by(stats_grouped_by_hhid_stove,stove_descriptions) %>%
dplyr::summarise(mean_events_per_day=mean(events_per_day, na.rm = TRUE),
median_events_per_day=median(events_per_day, na.rm = TRUE),
sd_events_per_day=sd(events_per_day, na.rm = TRUE),
mean_minutes_per_day_stove_hhid=mean(minutes_per_day_stove_hhid, na.rm = TRUE),
median_minutes_per_day_stove_hhid=median(minutes_per_day_stove_hhid, na.rm = TRUE),
sd_minutes_per_day_stove_hhid=sd(minutes_per_day_stove_hhid, na.rm = TRUE),
house_stoves=n()) %>%
dplyr::filter(!is.na(stove_descriptions))
kable(stats_by_stove, digits=2)
| CleanCook |
0.86 |
0.75 |
0.72 |
25.71 |
26.76 |
19.81 |
23 |
| Kerosene |
1.45 |
1.39 |
1.00 |
99.55 |
74.62 |
77.16 |
23 |
| LPG |
0.63 |
0.63 |
0.47 |
20.45 |
18.96 |
17.73 |
5 |
#Summarize use for each household and stove, by month
stats_grouped_by_hhid_stove_month <- dplyr::group_by(ok_cooking_events_padded,stove_descriptions,HHID,group,month_year) %>%
# calculate events per day for each household and stove. Can then take summary stats of those.
dplyr::summarise(events_per_day=sum(use_flag,na.rm=TRUE)/length(unique(day_month_year)),
minutes_per_day_stove_hhid = sum(duration_minutes,na.rm=TRUE)/length(unique(day_month_year)),
minutes_per_event_stove_hhid = sum(duration_minutes,na.rm=TRUE)/sum(use_flag,na.rm=TRUE),
days_logged_per_group_by_stove=sum(length(unique(day_month_year)), na.rm = TRUE))
kable(stats_grouped_by_hhid_stove_month, digits=2)
| CleanCook |
AKO_01B |
AKO |
Jan-18 |
0.00 |
0.00 |
NaN |
11 |
| CleanCook |
AKO_01B |
AKO |
Feb-18 |
0.00 |
0.00 |
NaN |
2 |
| CleanCook |
AKO_02B |
AKO |
Jan-18 |
1.27 |
39.09 |
30.71 |
11 |
| CleanCook |
AKO_02B |
AKO |
Feb-18 |
2.50 |
5.00 |
2.00 |
2 |
| CleanCook |
AKO_03B |
AKO |
Jan-18 |
3.27 |
77.27 |
23.61 |
11 |
| CleanCook |
AKO_03B |
AKO |
Feb-18 |
4.00 |
85.00 |
21.25 |
2 |
| CleanCook |
AKO_04 |
AKO |
Oct-17 |
0.63 |
10.53 |
16.67 |
19 |
| CleanCook |
AKO_04 |
AKO |
Nov-17 |
0.00 |
0.00 |
NaN |
12 |
| CleanCook |
AKO_04 |
AKO |
Dec-17 |
0.00 |
0.00 |
NaN |
22 |
| CleanCook |
AKO_04 |
AKO |
Jan-18 |
0.00 |
0.00 |
NaN |
30 |
| CleanCook |
AKO_04 |
AKO |
Feb-18 |
1.50 |
52.50 |
35.00 |
12 |
| CleanCook |
AKO_05B |
AKO |
Jan-18 |
0.67 |
41.67 |
62.50 |
6 |
| CleanCook |
AKO_05B |
AKO |
Feb-18 |
1.00 |
30.00 |
30.00 |
2 |
| CleanCook |
AKO_06B |
AKO |
Jan-18 |
0.00 |
0.00 |
NaN |
11 |
| CleanCook |
AKO_06B |
AKO |
Feb-18 |
0.00 |
0.00 |
NaN |
2 |
| CleanCook |
AKO_09 |
AKO |
Nov-17 |
0.00 |
0.00 |
NaN |
1 |
| CleanCook |
AKO_09 |
AKO |
Dec-17 |
0.68 |
36.84 |
53.85 |
19 |
| CleanCook |
AKO_09 |
AKO |
Feb-18 |
0.50 |
15.00 |
30.00 |
14 |
| CleanCook |
AKO_10 |
AKO |
Oct-17 |
2.56 |
86.67 |
33.91 |
9 |
| CleanCook |
AKO_10 |
AKO |
Nov-17 |
0.00 |
0.00 |
NaN |
1 |
| CleanCook |
AKO_10 |
AKO |
Dec-17 |
1.04 |
21.92 |
21.11 |
26 |
| CleanCook |
AKO_10 |
AKO |
Jan-18 |
1.42 |
35.00 |
24.71 |
12 |
| CleanCook |
AKO_10 |
AKO |
Feb-18 |
1.00 |
36.43 |
36.43 |
14 |
| CleanCook |
MUS_02 |
MUS |
Oct-17 |
1.27 |
32.73 |
25.71 |
11 |
| CleanCook |
MUS_02 |
MUS |
Nov-17 |
0.62 |
11.90 |
19.23 |
21 |
| CleanCook |
MUS_02 |
MUS |
Dec-17 |
0.00 |
0.00 |
NaN |
31 |
| CleanCook |
MUS_02 |
MUS |
Jan-18 |
0.00 |
0.00 |
NaN |
18 |
| CleanCook |
MUS_03 |
MUS |
Oct-17 |
0.70 |
11.00 |
15.71 |
10 |
| CleanCook |
MUS_03 |
MUS |
Nov-17 |
0.80 |
9.00 |
11.25 |
10 |
| CleanCook |
MUS_03 |
MUS |
Dec-17 |
0.00 |
0.00 |
NaN |
19 |
| CleanCook |
MUS_03 |
MUS |
Jan-18 |
0.00 |
0.00 |
NaN |
31 |
| CleanCook |
MUS_03 |
MUS |
Feb-18 |
0.00 |
0.00 |
NaN |
6 |
| CleanCook |
MUS_04 |
MUS |
Oct-17 |
0.94 |
24.38 |
26.00 |
16 |
| CleanCook |
MUS_04 |
MUS |
Nov-17 |
0.94 |
18.75 |
20.00 |
16 |
| CleanCook |
MUS_04 |
MUS |
Dec-17 |
0.00 |
0.00 |
NaN |
22 |
| CleanCook |
MUS_04 |
MUS |
Jan-18 |
0.00 |
0.00 |
NaN |
30 |
| CleanCook |
MUS_04 |
MUS |
Feb-18 |
0.00 |
0.00 |
NaN |
2 |
| CleanCook |
MUS_05 |
MUS |
Oct-17 |
2.00 |
45.00 |
22.50 |
2 |
| CleanCook |
MUS_05 |
MUS |
Nov-17 |
1.38 |
36.25 |
26.36 |
24 |
| CleanCook |
MUS_05 |
MUS |
Dec-17 |
1.38 |
46.92 |
33.89 |
13 |
| CleanCook |
MUS_06 |
MUS |
Oct-17 |
4.00 |
361.25 |
90.31 |
16 |
| CleanCook |
MUS_06 |
MUS |
Nov-17 |
0.00 |
0.00 |
NaN |
28 |
| CleanCook |
MUS_06 |
MUS |
Dec-17 |
0.37 |
12.63 |
34.29 |
19 |
| CleanCook |
MUS_06 |
MUS |
Jan-18 |
0.00 |
0.00 |
NaN |
24 |
| CleanCook |
MUS_06 |
MUS |
Feb-18 |
0.00 |
0.00 |
NaN |
17 |
| CleanCook |
MUS_07 |
MUS |
Oct-17 |
0.73 |
30.00 |
40.91 |
15 |
| CleanCook |
MUS_07 |
MUS |
Nov-17 |
1.17 |
32.50 |
27.86 |
24 |
| CleanCook |
MUS_07 |
MUS |
Dec-17 |
1.28 |
49.31 |
38.65 |
29 |
| CleanCook |
MUS_07 |
MUS |
Jan-18 |
1.76 |
63.33 |
35.95 |
21 |
| CleanCook |
MUS_07 |
MUS |
Feb-18 |
0.33 |
6.67 |
20.00 |
6 |
| CleanCook |
MUS_08 |
MUS |
Oct-17 |
0.75 |
19.00 |
25.33 |
20 |
| CleanCook |
MUS_08 |
MUS |
Nov-17 |
0.00 |
0.00 |
NaN |
28 |
| CleanCook |
MUS_08 |
MUS |
Dec-17 |
0.83 |
17.83 |
21.58 |
23 |
| CleanCook |
MUS_08 |
MUS |
Jan-18 |
1.00 |
7.50 |
7.50 |
4 |
| CleanCook |
MUS_08 |
MUS |
Feb-18 |
3.00 |
91.25 |
30.42 |
8 |
| CleanCook |
MUS_09 |
MUS |
Oct-17 |
2.90 |
75.00 |
25.86 |
20 |
| CleanCook |
MUS_09 |
MUS |
Nov-17 |
1.45 |
46.36 |
31.88 |
22 |
| CleanCook |
MUS_09 |
MUS |
Dec-17 |
1.45 |
31.29 |
21.56 |
31 |
| CleanCook |
MUS_09 |
MUS |
Jan-18 |
1.15 |
38.46 |
33.33 |
26 |
| CleanCook |
MUS_09 |
MUS |
Feb-18 |
1.94 |
48.75 |
25.16 |
16 |
| CleanCook |
MUS_10 |
MUS |
Oct-17 |
2.25 |
75.00 |
33.33 |
4 |
| CleanCook |
MUS_10 |
MUS |
Nov-17 |
0.95 |
27.00 |
28.42 |
20 |
| CleanCook |
MUS_10 |
MUS |
Dec-17 |
0.67 |
17.62 |
26.43 |
21 |
| CleanCook |
MUS_10 |
MUS |
Jan-18 |
1.44 |
37.20 |
25.83 |
25 |
| CleanCook |
SHO_01 |
SHO |
Oct-17 |
1.33 |
39.05 |
29.29 |
21 |
| CleanCook |
SHO_01 |
SHO |
Nov-17 |
0.94 |
33.89 |
35.88 |
18 |
| CleanCook |
SHO_01 |
SHO |
Dec-17 |
0.77 |
25.38 |
33.00 |
13 |
| CleanCook |
SHO_01 |
SHO |
Jan-18 |
0.00 |
0.00 |
NaN |
6 |
| CleanCook |
SHO_01 |
SHO |
Feb-18 |
0.92 |
14.17 |
15.45 |
12 |
| CleanCook |
SHO_02 |
SHO |
Oct-17 |
1.29 |
38.57 |
30.00 |
7 |
| CleanCook |
SHO_02 |
SHO |
Nov-17 |
0.86 |
26.67 |
31.11 |
21 |
| CleanCook |
SHO_02 |
SHO |
Dec-17 |
1.08 |
25.38 |
23.57 |
13 |
| CleanCook |
SHO_02 |
SHO |
Jan-18 |
0.53 |
17.65 |
33.33 |
17 |
| CleanCook |
SHO_02 |
SHO |
Feb-18 |
0.00 |
0.00 |
NaN |
2 |
| CleanCook |
SHO_05 |
SHO |
Oct-17 |
0.83 |
50.00 |
60.00 |
6 |
| CleanCook |
SHO_05 |
SHO |
Nov-17 |
0.77 |
16.00 |
20.87 |
30 |
| CleanCook |
SHO_05 |
SHO |
Dec-17 |
0.30 |
7.33 |
24.44 |
30 |
| CleanCook |
SHO_05 |
SHO |
Jan-18 |
2.00 |
27.14 |
13.57 |
7 |
| CleanCook |
SHO_05 |
SHO |
Feb-18 |
0.67 |
0.00 |
0.00 |
6 |
| CleanCook |
SHO_06 |
SHO |
Oct-17 |
0.81 |
14.76 |
18.24 |
21 |
| CleanCook |
SHO_06 |
SHO |
Nov-17 |
0.63 |
22.22 |
35.29 |
27 |
| CleanCook |
SHO_06 |
SHO |
Dec-17 |
0.25 |
5.71 |
22.86 |
28 |
| CleanCook |
SHO_06 |
SHO |
Jan-18 |
0.00 |
0.00 |
NaN |
3 |
| CleanCook |
SHO_06 |
SHO |
Feb-18 |
0.00 |
0.00 |
NaN |
13 |
| CleanCook |
SHO_07 |
SHO |
Oct-17 |
0.71 |
17.86 |
25.00 |
14 |
| CleanCook |
SHO_07 |
SHO |
Nov-17 |
1.00 |
30.00 |
30.00 |
4 |
| CleanCook |
SHO_07 |
SHO |
Dec-17 |
0.57 |
14.29 |
25.00 |
7 |
| CleanCook |
SHO_07 |
SHO |
Jan-18 |
1.67 |
69.17 |
41.50 |
12 |
| CleanCook |
SHO_07 |
SHO |
Feb-18 |
0.54 |
12.31 |
22.86 |
13 |
| CleanCook |
SHO_09 |
SHO |
Oct-17 |
0.50 |
12.73 |
25.45 |
22 |
| CleanCook |
SHO_09 |
SHO |
Nov-17 |
0.00 |
0.00 |
NaN |
15 |
| CleanCook |
SHO_09 |
SHO |
Dec-17 |
0.50 |
7.14 |
14.29 |
14 |
| CleanCook |
SHO_09 |
SHO |
Jan-18 |
0.28 |
3.89 |
14.00 |
18 |
| CleanCook |
SHO_09 |
SHO |
Feb-18 |
2.00 |
0.00 |
0.00 |
1 |
| Kerosene |
AKO_01B |
AKO |
Jan-18 |
1.55 |
82.73 |
53.53 |
11 |
| Kerosene |
AKO_01B |
AKO |
Feb-18 |
1.00 |
30.00 |
30.00 |
2 |
| Kerosene |
AKO_02B |
AKO |
Jan-18 |
0.00 |
0.00 |
NaN |
11 |
| Kerosene |
AKO_02B |
AKO |
Feb-18 |
0.00 |
0.00 |
NaN |
2 |
| Kerosene |
AKO_03B |
AKO |
Jan-18 |
1.91 |
195.45 |
102.38 |
11 |
| Kerosene |
AKO_03B |
AKO |
Feb-18 |
1.50 |
140.00 |
93.33 |
2 |
| Kerosene |
AKO_04 |
AKO |
Oct-17 |
0.81 |
39.38 |
48.46 |
16 |
| Kerosene |
AKO_04 |
AKO |
Nov-17 |
0.73 |
34.55 |
47.50 |
11 |
| Kerosene |
AKO_04 |
AKO |
Dec-17 |
0.57 |
28.70 |
50.77 |
23 |
| Kerosene |
AKO_04 |
AKO |
Jan-18 |
1.28 |
63.10 |
49.46 |
29 |
| Kerosene |
AKO_04 |
AKO |
Feb-18 |
0.93 |
67.14 |
72.31 |
14 |
| Kerosene |
AKO_05B |
AKO |
Jan-18 |
0.36 |
20.00 |
55.00 |
11 |
| Kerosene |
AKO_05B |
AKO |
Feb-18 |
1.00 |
30.00 |
30.00 |
1 |
| Kerosene |
AKO_06B |
AKO |
Jan-18 |
1.09 |
70.00 |
64.17 |
11 |
| Kerosene |
AKO_06B |
AKO |
Feb-18 |
1.00 |
90.00 |
90.00 |
2 |
| Kerosene |
AKO_09 |
AKO |
Oct-17 |
1.90 |
135.00 |
71.05 |
10 |
| Kerosene |
AKO_09 |
AKO |
Nov-17 |
0.00 |
0.00 |
NaN |
11 |
| Kerosene |
AKO_09 |
AKO |
Dec-17 |
0.64 |
25.45 |
40.00 |
11 |
| Kerosene |
AKO_09 |
AKO |
Jan-18 |
0.69 |
63.12 |
91.82 |
16 |
| Kerosene |
AKO_09 |
AKO |
Feb-18 |
0.00 |
0.00 |
NaN |
2 |
| Kerosene |
AKO_10 |
AKO |
Oct-17 |
1.83 |
106.09 |
58.10 |
23 |
| Kerosene |
AKO_10 |
AKO |
Nov-17 |
3.55 |
225.45 |
63.59 |
11 |
| Kerosene |
AKO_10 |
AKO |
Dec-17 |
2.40 |
185.33 |
77.22 |
30 |
| Kerosene |
AKO_10 |
AKO |
Jan-18 |
2.33 |
216.00 |
92.57 |
30 |
| Kerosene |
AKO_10 |
AKO |
Feb-18 |
2.29 |
153.57 |
67.19 |
14 |
| Kerosene |
MUS_02 |
MUS |
Oct-17 |
2.71 |
124.29 |
45.79 |
21 |
| Kerosene |
MUS_02 |
MUS |
Nov-17 |
2.41 |
116.67 |
48.46 |
27 |
| Kerosene |
MUS_02 |
MUS |
Dec-17 |
3.58 |
276.77 |
77.30 |
31 |
| Kerosene |
MUS_02 |
MUS |
Jan-18 |
3.66 |
225.86 |
61.79 |
29 |
| Kerosene |
MUS_02 |
MUS |
Feb-18 |
3.00 |
153.75 |
51.25 |
16 |
| Kerosene |
MUS_03 |
MUS |
Oct-17 |
1.50 |
119.00 |
79.33 |
10 |
| Kerosene |
MUS_03 |
MUS |
Nov-17 |
0.00 |
0.00 |
NaN |
10 |
| Kerosene |
MUS_04 |
MUS |
Nov-17 |
0.33 |
12.22 |
36.67 |
18 |
| Kerosene |
MUS_04 |
MUS |
Dec-17 |
1.06 |
70.00 |
65.76 |
31 |
| Kerosene |
MUS_04 |
MUS |
Jan-18 |
0.97 |
80.00 |
82.76 |
30 |
| Kerosene |
MUS_05 |
MUS |
Oct-17 |
1.75 |
92.50 |
52.86 |
8 |
| Kerosene |
MUS_05 |
MUS |
Nov-17 |
1.58 |
70.42 |
44.47 |
24 |
| Kerosene |
MUS_05 |
MUS |
Dec-17 |
2.62 |
77.50 |
29.52 |
8 |
| Kerosene |
MUS_05 |
MUS |
Jan-18 |
2.42 |
82.50 |
34.14 |
24 |
| Kerosene |
MUS_05 |
MUS |
Feb-18 |
2.00 |
80.00 |
40.00 |
2 |
| Kerosene |
MUS_06 |
MUS |
Oct-17 |
4.40 |
454.00 |
103.18 |
10 |
| Kerosene |
MUS_06 |
MUS |
Nov-17 |
4.00 |
435.71 |
108.93 |
28 |
| Kerosene |
MUS_06 |
MUS |
Dec-17 |
1.96 |
200.74 |
102.26 |
27 |
| Kerosene |
MUS_06 |
MUS |
Jan-18 |
0.67 |
71.11 |
106.67 |
27 |
| Kerosene |
MUS_06 |
MUS |
Feb-18 |
0.71 |
50.00 |
70.00 |
14 |
| Kerosene |
MUS_07 |
MUS |
Oct-17 |
0.78 |
37.78 |
48.57 |
18 |
| Kerosene |
MUS_07 |
MUS |
Nov-17 |
1.80 |
120.00 |
66.67 |
5 |
| Kerosene |
MUS_07 |
MUS |
Dec-17 |
0.25 |
11.50 |
46.00 |
20 |
| Kerosene |
MUS_07 |
MUS |
Jan-18 |
0.80 |
28.00 |
35.00 |
5 |
| Kerosene |
MUS_07 |
MUS |
Feb-18 |
0.77 |
40.00 |
52.00 |
13 |
| Kerosene |
MUS_08 |
MUS |
Nov-17 |
2.08 |
120.83 |
58.00 |
12 |
| Kerosene |
MUS_08 |
MUS |
Dec-17 |
1.79 |
72.63 |
40.59 |
19 |
| Kerosene |
MUS_08 |
MUS |
Jan-18 |
2.40 |
165.60 |
69.00 |
25 |
| Kerosene |
MUS_08 |
MUS |
Feb-18 |
1.69 |
110.62 |
65.56 |
16 |
| Kerosene |
MUS_09 |
MUS |
Oct-17 |
1.22 |
3.48 |
2.86 |
23 |
| Kerosene |
MUS_09 |
MUS |
Nov-17 |
0.96 |
9.64 |
10.00 |
28 |
| Kerosene |
MUS_09 |
MUS |
Dec-17 |
1.67 |
98.67 |
59.20 |
30 |
| Kerosene |
MUS_09 |
MUS |
Jan-18 |
1.38 |
111.38 |
80.75 |
29 |
| Kerosene |
MUS_09 |
MUS |
Feb-18 |
1.00 |
137.69 |
137.69 |
13 |
| Kerosene |
MUS_10 |
MUS |
Oct-17 |
2.67 |
153.33 |
57.50 |
3 |
| Kerosene |
MUS_10 |
MUS |
Nov-17 |
2.80 |
211.67 |
75.60 |
30 |
| Kerosene |
MUS_10 |
MUS |
Dec-17 |
2.40 |
164.67 |
68.61 |
30 |
| Kerosene |
MUS_10 |
MUS |
Jan-18 |
2.41 |
162.22 |
67.38 |
27 |
| Kerosene |
MUS_10 |
MUS |
Feb-18 |
4.00 |
230.00 |
57.50 |
2 |
| Kerosene |
SHO_01 |
SHO |
Oct-17 |
2.11 |
144.44 |
68.42 |
18 |
| Kerosene |
SHO_01 |
SHO |
Nov-17 |
1.69 |
80.62 |
47.78 |
16 |
| Kerosene |
SHO_01 |
SHO |
Dec-17 |
1.88 |
135.00 |
72.00 |
16 |
| Kerosene |
SHO_01 |
SHO |
Jan-18 |
2.05 |
127.37 |
62.05 |
19 |
| Kerosene |
SHO_01 |
SHO |
Feb-18 |
1.47 |
70.00 |
47.73 |
15 |
| Kerosene |
SHO_02 |
SHO |
Oct-17 |
2.25 |
183.75 |
81.67 |
8 |
| Kerosene |
SHO_02 |
SHO |
Nov-17 |
2.57 |
225.71 |
87.78 |
21 |
| Kerosene |
SHO_02 |
SHO |
Dec-17 |
1.67 |
165.00 |
99.00 |
12 |
| Kerosene |
SHO_02 |
SHO |
Jan-18 |
1.47 |
130.53 |
88.57 |
19 |
| Kerosene |
SHO_02 |
SHO |
Feb-18 |
0.80 |
87.33 |
109.17 |
15 |
| Kerosene |
SHO_05 |
SHO |
Oct-17 |
4.00 |
235.00 |
58.75 |
2 |
| Kerosene |
SHO_05 |
SHO |
Nov-17 |
4.83 |
345.22 |
71.53 |
23 |
| Kerosene |
SHO_05 |
SHO |
Dec-17 |
2.17 |
136.67 |
63.08 |
6 |
| Kerosene |
SHO_05 |
SHO |
Jan-18 |
2.25 |
240.00 |
106.67 |
8 |
| Kerosene |
SHO_05 |
SHO |
Feb-18 |
4.14 |
212.86 |
51.38 |
7 |
| Kerosene |
SHO_06 |
SHO |
Oct-17 |
0.00 |
0.00 |
NaN |
24 |
| Kerosene |
SHO_06 |
SHO |
Nov-17 |
0.00 |
0.00 |
NaN |
28 |
| Kerosene |
SHO_06 |
SHO |
Dec-17 |
0.00 |
0.00 |
NaN |
19 |
| Kerosene |
SHO_06 |
SHO |
Feb-18 |
0.00 |
0.00 |
NaN |
13 |
| Kerosene |
SHO_07 |
SHO |
Oct-17 |
0.57 |
34.29 |
60.00 |
7 |
| Kerosene |
SHO_07 |
SHO |
Nov-17 |
0.00 |
0.00 |
NaN |
5 |
| Kerosene |
SHO_07 |
SHO |
Dec-17 |
0.00 |
0.00 |
NaN |
20 |
| Kerosene |
SHO_09 |
SHO |
Oct-17 |
1.90 |
103.50 |
54.47 |
20 |
| Kerosene |
SHO_09 |
SHO |
Nov-17 |
0.80 |
25.33 |
31.67 |
15 |
| Kerosene |
SHO_09 |
SHO |
Dec-17 |
0.67 |
30.00 |
45.00 |
3 |
| Kerosene |
SHO_09 |
SHO |
Jan-18 |
1.53 |
98.95 |
64.83 |
19 |
| Kerosene |
SHO_09 |
SHO |
Feb-18 |
0.50 |
15.00 |
30.00 |
2 |
| LPG |
AKO_04 |
AKO |
Oct-17 |
0.90 |
31.50 |
35.00 |
20 |
| LPG |
AKO_04 |
AKO |
Nov-17 |
1.00 |
20.00 |
20.00 |
1 |
| LPG |
AKO_04 |
AKO |
Dec-17 |
1.73 |
48.67 |
28.08 |
30 |
| LPG |
AKO_04 |
AKO |
Jan-18 |
1.00 |
26.07 |
26.07 |
28 |
| LPG |
AKO_04 |
AKO |
Feb-18 |
0.50 |
5.71 |
11.43 |
14 |
| LPG |
SHO_01 |
SHO |
Nov-17 |
0.43 |
12.86 |
30.00 |
14 |
| LPG |
SHO_01 |
SHO |
Dec-17 |
1.86 |
35.71 |
19.23 |
7 |
| LPG |
SHO_01 |
SHO |
Jan-18 |
0.00 |
0.00 |
NaN |
19 |
| LPG |
SHO_01 |
SHO |
Feb-18 |
0.00 |
0.00 |
NaN |
15 |
| LPG |
SHO_02 |
SHO |
Nov-17 |
0.00 |
0.00 |
NaN |
9 |
| LPG |
SHO_05 |
SHO |
Nov-17 |
0.64 |
6.82 |
10.71 |
22 |
| LPG |
SHO_05 |
SHO |
Dec-17 |
0.07 |
2.00 |
30.00 |
30 |
| LPG |
SHO_05 |
SHO |
Jan-18 |
2.00 |
68.75 |
34.38 |
8 |
| LPG |
SHO_05 |
SHO |
Feb-18 |
1.43 |
72.86 |
51.00 |
7 |
| LPG |
SHO_06 |
SHO |
Oct-17 |
1.33 |
59.33 |
44.50 |
15 |
| LPG |
SHO_06 |
SHO |
Nov-17 |
1.11 |
54.64 |
49.35 |
28 |
| LPG |
SHO_06 |
SHO |
Dec-17 |
1.39 |
46.67 |
33.60 |
18 |
| LPG |
SHO_06 |
SHO |
Feb-18 |
0.00 |
0.00 |
NaN |
13 |
#Calculate summary for the overall groups and stoves from the household-wise results above.
stats_by_stove_group_month <- dplyr::group_by(stats_grouped_by_hhid_stove_month,stove_descriptions,group,month_year) %>%
dplyr::summarise(mean_events_per_day=mean(events_per_day, na.rm = TRUE),
median_events_per_day=median(events_per_day, na.rm = TRUE),
sd_events_per_day=sd(events_per_day, na.rm = TRUE),
mean_minutes_per_day_stove_hhid=mean(minutes_per_day_stove_hhid, na.rm = TRUE),
median_minutes_per_day_stove_hhid=median(minutes_per_day_stove_hhid, na.rm = TRUE),
sd_minutes_per_day_stove_hhid=sd(minutes_per_day_stove_hhid, na.rm = TRUE),
households=n(),
mean_days_logged=mean(days_logged_per_group_by_stove, na.rm = TRUE))
kable(stats_by_stove_group_month, digits=2)
| CleanCook |
AKO |
Oct-17 |
1.59 |
1.59 |
1.36 |
48.60 |
48.60 |
53.84 |
2 |
14.00 |
| CleanCook |
AKO |
Nov-17 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
0.00 |
3 |
4.67 |
| CleanCook |
AKO |
Dec-17 |
0.57 |
0.68 |
0.53 |
19.59 |
21.92 |
18.53 |
3 |
22.33 |
| CleanCook |
AKO |
Jan-18 |
0.95 |
0.67 |
1.19 |
27.58 |
35.00 |
29.26 |
7 |
13.14 |
| CleanCook |
AKO |
Feb-18 |
1.31 |
1.00 |
1.36 |
27.99 |
22.50 |
29.74 |
8 |
6.25 |
| CleanCook |
MUS |
Oct-17 |
1.73 |
1.27 |
1.16 |
74.82 |
32.73 |
109.80 |
9 |
12.67 |
| CleanCook |
MUS |
Nov-17 |
0.81 |
0.94 |
0.53 |
20.20 |
18.75 |
16.40 |
9 |
21.44 |
| CleanCook |
MUS |
Dec-17 |
0.66 |
0.67 |
0.61 |
19.51 |
17.62 |
19.28 |
9 |
23.11 |
| CleanCook |
MUS |
Jan-18 |
0.67 |
0.50 |
0.75 |
18.31 |
3.75 |
24.63 |
8 |
22.38 |
| CleanCook |
MUS |
Feb-18 |
0.88 |
0.17 |
1.28 |
24.44 |
3.33 |
37.85 |
6 |
9.17 |
| CleanCook |
SHO |
Oct-17 |
0.91 |
0.82 |
0.33 |
28.83 |
28.21 |
15.65 |
6 |
15.17 |
| CleanCook |
SHO |
Nov-17 |
0.70 |
0.81 |
0.37 |
21.46 |
24.44 |
12.21 |
6 |
19.17 |
| CleanCook |
SHO |
Dec-17 |
0.58 |
0.54 |
0.31 |
14.21 |
10.81 |
9.16 |
6 |
17.50 |
| CleanCook |
SHO |
Jan-18 |
0.75 |
0.40 |
0.87 |
19.64 |
10.77 |
26.58 |
6 |
10.50 |
| CleanCook |
SHO |
Feb-18 |
0.69 |
0.60 |
0.74 |
4.41 |
0.00 |
6.86 |
6 |
7.83 |
| Kerosene |
AKO |
Oct-17 |
1.51 |
1.83 |
0.61 |
93.49 |
106.09 |
49.04 |
3 |
16.33 |
| Kerosene |
AKO |
Nov-17 |
1.42 |
0.73 |
1.87 |
86.67 |
34.55 |
121.43 |
3 |
11.00 |
| Kerosene |
AKO |
Dec-17 |
1.20 |
0.64 |
1.04 |
79.83 |
28.70 |
91.38 |
3 |
21.33 |
| Kerosene |
AKO |
Jan-18 |
1.15 |
1.18 |
0.78 |
88.80 |
66.56 |
77.35 |
8 |
16.25 |
| Kerosene |
AKO |
Feb-18 |
0.96 |
1.00 |
0.75 |
63.84 |
48.57 |
59.75 |
8 |
4.88 |
| Kerosene |
MUS |
Oct-17 |
2.15 |
1.75 |
1.22 |
140.63 |
119.00 |
147.65 |
7 |
13.29 |
| Kerosene |
MUS |
Nov-17 |
1.77 |
1.80 |
1.25 |
121.91 |
116.67 |
136.34 |
9 |
20.22 |
| Kerosene |
MUS |
Dec-17 |
1.92 |
1.88 |
1.01 |
121.56 |
88.08 |
86.06 |
8 |
24.50 |
| Kerosene |
MUS |
Jan-18 |
1.84 |
1.89 |
1.05 |
115.83 |
96.94 |
64.21 |
8 |
24.50 |
| Kerosene |
MUS |
Feb-18 |
1.88 |
1.69 |
1.24 |
114.58 |
110.62 |
66.25 |
7 |
10.86 |
| Kerosene |
SHO |
Oct-17 |
1.81 |
2.01 |
1.41 |
116.83 |
123.97 |
89.27 |
6 |
13.17 |
| Kerosene |
SHO |
Nov-17 |
1.65 |
1.24 |
1.85 |
112.82 |
52.98 |
142.06 |
6 |
18.00 |
| Kerosene |
SHO |
Dec-17 |
1.06 |
1.17 |
0.97 |
77.78 |
82.50 |
75.80 |
6 |
12.67 |
| Kerosene |
SHO |
Jan-18 |
1.83 |
1.79 |
0.39 |
149.21 |
128.95 |
62.17 |
4 |
16.25 |
| Kerosene |
SHO |
Feb-18 |
1.38 |
0.80 |
1.63 |
77.04 |
70.00 |
84.24 |
5 |
10.40 |
| LPG |
AKO |
Oct-17 |
0.90 |
0.90 |
NA |
31.50 |
31.50 |
NA |
1 |
20.00 |
| LPG |
AKO |
Nov-17 |
1.00 |
1.00 |
NA |
20.00 |
20.00 |
NA |
1 |
1.00 |
| LPG |
AKO |
Dec-17 |
1.73 |
1.73 |
NA |
48.67 |
48.67 |
NA |
1 |
30.00 |
| LPG |
AKO |
Jan-18 |
1.00 |
1.00 |
NA |
26.07 |
26.07 |
NA |
1 |
28.00 |
| LPG |
AKO |
Feb-18 |
0.50 |
0.50 |
NA |
5.71 |
5.71 |
NA |
1 |
14.00 |
| LPG |
SHO |
Oct-17 |
1.33 |
1.33 |
NA |
59.33 |
59.33 |
NA |
1 |
15.00 |
| LPG |
SHO |
Nov-17 |
0.54 |
0.53 |
0.46 |
18.58 |
9.84 |
24.61 |
4 |
18.25 |
| LPG |
SHO |
Dec-17 |
1.10 |
1.39 |
0.93 |
28.13 |
35.71 |
23.28 |
3 |
18.33 |
| LPG |
SHO |
Jan-18 |
1.00 |
1.00 |
1.41 |
34.38 |
34.38 |
48.61 |
2 |
13.50 |
| LPG |
SHO |
Feb-18 |
0.48 |
0.00 |
0.82 |
24.29 |
0.00 |
42.06 |
3 |
11.67 |
#Write summary stats to an excel workbook
list_of_datasets <- list("By Stove Group and HHID" = stats_grouped_by_hhid_stove, "By Stove Group" = stats_by_stove_group,
"By Stove Type" = stats_by_stove,"By HHID and Monthly" = stats_grouped_by_hhid_stove_month,
"By Group and Monthly" = stats_by_stove_group_month)
openxlsx::write.xlsx(list_of_datasets, file = paste0("Summary Statistics ",format(now(),"%y-%m-%d"),".xlsx"))
Plot all data time series
- Plot usage rates (uses/day) by stove type, and region
#Plot time series for each household, colored by filename. For data completion purposes.
field_timeseries_plot(sumsarized_filtered, "stove_temp", "datetime", "HHID", "stove_descriptions","qc")

Plot data time series, removing flagged files and households
- Plot usage rates (uses/day) by stove type, and region
#Plot time series for each household, colored by filename. Filtered out bad qc data
field_timeseries_plot(dplyr::filter(sumsarized_filtered,grepl("ok",qc)),"stove_temp", "datetime", "HHID", "stove_descriptions","qc")
